from pyspark.context import SparkContext

sc = SparkContext(master="local", appName="word_count")

# 读取数据
students_rdd = sc.textFile("../../data/students.txt")

kv_rdd = students_rdd.map(lambda x:(x.split(',')[-1],1))
num_rdd = kv_rdd.reduceByKey(lambda x,y: x+y)

sort_rdd = num_rdd.sortBy(lambda kv:kv[1],ascending=False)

sort_rdd.foreach(print)